#!/bin/bash

# author: sunning
# created: 2022年9月14日15:25:18
# updated: 2022年11月29日18:58:44
# desc: 该脚本用于MSA作业运行脚本
#       该脚本中涉及到的数据集路径为傲飞集群上的绝对路径
# 该脚本接收两个参数
# --input_dir 待搜索序列fasta文件所在目录
# --output_dir 结果输出目录

for i in /etc/profile.d/*.sh /etc/profile.d/sh.local ; do
    if [ -r "$i" ]; then
        if [ "${-#*i}" != "$-" ]; then
            . "$i"
        else
            . "$i" >/dev/null
        fi
    fi
done

for key in "$@"; do
  case $key in
    --input_dir=*)
      input_dir="${key#*=}"
      shift # past argument=value
      ;;
    --output_dir=*)
      output_dir="${key#*=}"
      shift # past argument=value
      ;;
    --default)
      DEFAULT=YES
      shift # past argument with no value
      ;;
    *)
      ;;
  esac
done

if [ -z $input_dir ] || [ ! -d $input_dir ]; then
  echo "please input data directory containing fasta file"
  exit 1
fi

if [ -z $output_dir ] ; then
  echo "please input valid output_dir!"
  exit 1
fi

if [ ! -d $output_dir ] ; then
  mkdir -p $output_dir
fi

module load anaconda/4.5.11
source activate msa
cd /mnt/oss/dros/common/ecosystem/MEDICINE_COMPUTING/apps/MSA/PROGRAM/
echo "========== start running MSA job =========="
srun python MSA_multi0.py $input_dir \
    --output_dir $output_dir \
    --pdb70_database_path /hpc/software/MSA/data/pdb70/pdb70 \
    --uniref90_database_path /hpc/software/MSA/data/uniref90/uniref90.fasta \
    --uniclust30_database_path /hpc/software/MSA/data/uniclust30/uniclust30/uniclust30_2018_08/uniclust30_2018_08 \
    --mgnify_database_path /hpc/software/MSA/data/mgnify/mgnify/mgy_clusters_2018_12.fa \
    --bfd_database_path /hpc/software/MSA/dataset/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt \
    --jackhmmer_binary_path `which jackhmmer` \
    --hhblits_binary_path `which hhblits` \
    --hhsearch_binary_path `which hhsearch` \
    --kalign_binary_path `which kalign` \
    --cpus 32 \
    --no_tasks 1 \


